do "E:/ReplicateBuild/02_code/00_environment/00_set_environment.do"

*** Estimate experience returns

local ss = "ma"

use "$temp/student_level_`ss'_w_unshrunken_VA", clear

cap drop lea schlcode

gen expdum = tchr_exp_pay_level
replace expdum = 6 if tchr_exp_pay_level>6 & tchr_exp_pay_level!=.
replace expdum = 7 if tchr_exp_pay_level==.
assert expdum<=7 & floor(expdum)==expdum

ren schid_`ss' schid
ren lea_`ss' lea
ren schlcode_`ss' schlcode
ren teachid_`ss' teachid
ren section_`ss' section
ren sidx_`ss' sidx


gen numstudentstidy = 1
collapse (mean) exp* tchr_exp_pay_level vijt DISAD (sum) numstudentstidy, by(tid sch_by_yr schid tidy teachid year)


reghdfe vijt i.expdum [fw=numstudentstidy], absorb(tch_e0_sfe=tid sfe=schid year) res(resid_tch_sfe) vce(cluster teachid)

forv ee=1/7 {
	if `ee'==1 {
		mat a = _b[`ee'.expdum]
		mat b = _se[`ee'.expdum]
	}
	if `ee'>1 {
		mat a = (a,_b[`ee'.expdum])
		mat b = (b,_se[`ee'.expdum])		
	}
	
}

gen avar = .
gen bvar = .
label var avar "Estimate"
label var bvar "Standard Error"


matrix rownames a = avar
matrix rownames b = bvar

matrix j=a\b

frmttable using "$tables/experience_returns_ma", statmat(j) replace va tex fra ///
	ctitles("","1","2","3","4","5","6","7+") ///
	sdec(3,3,3,3,3,3,3 \ 3,3,3,3,3,3,3)
	
*** Table and figure for VAM structural parameters

use "$temp/production_structural_param_boot", clear

forv m=1/2 {
	gen sigma_eps`m' = sqrt(sigma2_eps`m')
	gen sigma_muj`m' = sqrt(sigma2_muj`m')
	gen sigma_theta`m' = sqrt(sigma2_theta`m')
}

gen corr_theta = sigma_theta1theta2 / (sigma_theta1 * sigma_theta2)
gen corr_mu_0 = sigma_muj1muj2 / (sigma_muj1 * sigma_muj2)

foreach var in sigma_eps1 sigma_eps2 sigma_muj1 sigma_muj2 sigma_theta1 sigma_theta2 corr_theta corr_mu_0 {
	qui summ `var'
	global `var'_se = r(sd)
	sort `var'
	global `var'_lb = `var'[3]
	global `var'_ub = `var'[98]
}
	
local ss = "ma"
use "$temp/VAstructural_`ss'", replace

keep if _n==1

forv m=1/2 {
	gen sigma_eps`m' = sqrt(sigma2_eps`m')
	gen sigma_muj`m' = sqrt(sigma2_muj`m')
	gen sigma_theta`m' = sqrt(sigma2_theta`m')
}

gen corr_theta = sigma_theta1theta2 / (sigma_theta1 * sigma_theta2)
gen corr_mu_0 = sigma_muj1muj2 / (sigma_muj1 * sigma_muj2)


gen avar = .
gen bvar = .
gen cvar = .
gen dvar = .
gen evar = .
gen fvar = .
gen gvar = .
gen hvar = .

label var avar "$\sigma_{\epsilon 1}$"
label var bvar "$\sigma_{\epsilon 2}$"

label var cvar "$\sigma_{\theta 1}$"
label var dvar "$\sigma_{\theta 2}$"
label var evar "correlation($\theta_{c0t},\theta_{c1t}$)"

label var fvar "$\sigma_{\mu 1}$"
label var gvar "$\sigma_{\mu 2}$"
label var hvar "correlation($\mu_{j0t},\mu_{j1t}$)"

mat a = (sigma_eps1[1],${sigma_eps1_se},${sigma_eps1_lb},${sigma_eps1_ub})
mat b = (sigma_eps2[1],${sigma_eps2_se},${sigma_eps2_lb},${sigma_eps2_ub})

mat c = (sigma_theta1[1],${sigma_theta1_se},${sigma_theta1_lb},${sigma_theta1_ub})
mat d = (sigma_theta2[1],${sigma_theta2_se},${sigma_theta2_lb},${sigma_theta2_ub})
mat e = (corr_theta[1],${corr_theta_se},${corr_theta_lb},${corr_theta_ub})

mat f = (sigma_muj1[1],${sigma_muj1_se},${sigma_muj1_lb},${sigma_muj1_ub})
mat g = (sigma_muj2[1],${sigma_muj2_se},${sigma_muj2_lb},${sigma_muj2_ub})
mat h = (corr_mu_0[1],${corr_mu_0_se},${corr_mu_0_lb},${corr_mu_0_ub})

matrix rownames a = avar
matrix rownames b = bvar
matrix rownames c = cvar
matrix rownames d = dvar
matrix rownames e = evar
matrix rownames f = fvar
matrix rownames g = gvar
matrix rownames h = hvar

matrix j=a\b\c\d\e\f\g\h

frmttable using "$tables/structural_params_ma", statmat(j) replace va tex fra ///
	ctitles("","Estimates","Standard Errors","95\% CI Lower Bound","95\% CI Upper Bound") ///
	sdec(3,3,3,3 \ 3,3,3,3 \ 3,3,3,3 \ 3,3,3,3 \ 3,3,3,3 \ 3,3,3,3 \ 3,3,3,3 \ 3,3,3,3)



* structural parameters for other forms of type heterogeneity
clear matrix

local ss = "ma"
use "$temp/VAstructural_`ss'_white", replace

keep if _n==1

forv m=1/2 {
	gen sigma_eps`m' = sqrt(sigma2_eps`m')
	gen sigma_muj`m' = sqrt(sigma2_muj`m')
	gen sigma_theta`m' = sqrt(sigma2_theta`m')
}

gen corr_theta = sigma_theta1theta2 / (sigma_theta1 * sigma_theta2)
gen corr_mu_0 = sigma_muj1muj2 / (sigma_muj1 * sigma_muj2)


gen avar = .
gen bvar = .
gen cvar = .
gen dvar = .
gen evar = .
gen fvar = .
gen gvar = .
gen hvar = .

label var avar "$\sigma_{\epsilon 1}$"
label var bvar "$\sigma_{\epsilon 2}$"

label var cvar "$\sigma_{\theta 1}$"
label var dvar "$\sigma_{\theta 2}$"
label var evar "correlation($\theta_{c0t},\theta_{c1t}$)"

label var fvar "$\sigma_{\mu 1}$"
label var gvar "$\sigma_{\mu 2}$"
label var hvar "correlation($\mu_{j0t},\mu_{j1t}$)"

mat a = sigma_eps1[1]
mat b = sigma_eps2[1]

mat c = sigma_theta1[1]
mat d = sigma_theta2[1]
mat e = corr_theta[1]

mat f = sigma_muj1[1]
mat g = sigma_muj2[1]
mat h = corr_mu_0[1]

use "$temp/VAstructural_`ss'_achHi_ma", clear

keep if _n==1

forv m=1/2 {
	gen sigma_eps`m' = sqrt(sigma2_eps`m')
	gen sigma_muj`m' = sqrt(sigma2_muj`m')
	gen sigma_theta`m' = sqrt(sigma2_theta`m')
}

gen corr_theta = sigma_theta1theta2 / (sigma_theta1 * sigma_theta2)
gen corr_mu_0 = sigma_muj1muj2 / (sigma_muj1 * sigma_muj2)


gen avar = .
gen bvar = .
gen cvar = .
gen dvar = .
gen evar = .
gen fvar = .
gen gvar = .
gen hvar = .

label var avar "$\sigma_{\epsilon 1}$"
label var bvar "$\sigma_{\epsilon 2}$"

label var cvar "$\sigma_{\theta 1}$"
label var dvar "$\sigma_{\theta 2}$"
label var evar "correlation($\theta_{c0t},\theta_{c1t}$)"

label var fvar "$\sigma_{\mu 1}$"
label var gvar "$\sigma_{\mu 2}$"
label var hvar "correlation($\mu_{j0t},\mu_{j1t}$)"

mat a = (a,sigma_eps1[1])
mat b = (b,sigma_eps2[1])

mat c = (c,sigma_theta1[1])
mat d = (d,sigma_theta2[1])

mat e = (e,corr_theta[1])

mat f = (f,sigma_muj1[1])
mat g = (g,sigma_muj2[1])
mat h = (h,corr_mu_0[1])

matrix rownames a = avar
matrix rownames b = bvar
matrix rownames c = cvar
matrix rownames d = dvar
matrix rownames e = evar
matrix rownames f = fvar
matrix rownames g = gvar
matrix rownames h = hvar

matrix j=a\b\c\d\e\f\g\h

frmttable using "$tables/structural_params_ma_het", statmat(j) replace va tex fra ///
	ctitles("","Race","Achievement") ///
	sdec(3,3 \ 3,3 \ 3,3 \ 3,3 \ 3,3 \ 3,3 \ 3,3 \ 3,3)

* VA distribution

use "$basedata/va_estimates_drift", clear

sort j t s

keep if s!=.

ren Abar_ma Abar_ma_heterogeneous
ren mu_jt_hat_preY_ma mu_jt_hat_preY_ma_heterogeneous

sort j t schlcode lea

bys j t schlcode lea: egen num_obs = count(s)
tab num_obs
drop if num_obs>1
drop num_obs

merge 1:n j t schlcode lea using "$basedata/va_homogeneous_estimates_drift", keepusing(mu_jt_hat_preY_ma Abar_ma)
drop _m

bys j t schlcode lea: egen num_obs = count(s)
tab num_obs
drop if num_obs>1
drop num_obs

ren Abar_ma Abar_ma_homogeneous
ren mu_jt_hat_preY_ma mu_jt_hat_preY_ma_homogeneous


*** Check for forecast unbiasedness and whether it varies with transfer

keep if s!=.

bys j t: egen num_schools_t = count(s)
tab num_schools_t
drop if num_schools_t>1

sort j t s

xtset j t

egen lea_code = group(lea)

qui summ lea_code if lea=="XXX" /// hide identity
local FOCAL = r(mean)


gen transfer = s!=L.s if j==L.j & t==L.t+1
gen transfer_lea = lea_code!=L.lea_code if j==L.j & t==L.t+1
gen transfer_wi = s!=L.s & lea_code==L.lea_code if j==L.j & t==L.t+1
gen transfer_FOCAL = s!=L.s & lea_code==L.lea_code & lea_code==`FOCAL' if j==L.j & t==L.t+1
gen transfer_wi_nonFOCAL = transfer_wi==1 & transfer_FOCAL==0

gen has_transferred = transfer
replace has_transferred = 1 if j==L.j & L.transfer == 1

gen has_transferred_lea = transfer_lea
replace has_transferred_lea = 1 if j==L.j & L.transfer_lea == 1

gen has_transferred_wi = transfer_wi
replace has_transferred_wi = 1 if j==L.j & L.transfer_wi == 1

gen has_transferred_FOCAL = transfer_FOCAL
replace has_transferred_FOCAL = 1 if j==L.j & L.transfer_FOCAL == 1

gen has_transferred_wi_nonFOCAL = transfer_wi_nonFOCAL
replace has_transferred_wi_nonFOCAL = 1 if j==L.j & L.transfer_wi_nonFOCAL == 1

eststo clear

*** Check for whether forecast unbiasedness is affected by the type of change in number of students

qui summ t if j==L.j & t==L.t+1
local mint = r(min)
local maxt = r(max)

gen tot_n_ma = n_ct_m1_ma+n_ct_m2_ma

foreach ss in ma { 

	gen cum_n_`ss' = .
	
forv tt=`mint'/`maxt' {
	cap drop tempvar1 tempvar2
	gen tempvar1 = tot_n_`ss' if t<`tt'
	bys j: egen tempvar2 = mean(tempvar1)
	replace cum_n_`ss' = tempvar2 if t==`tt'
}
gen diff_n_`ss' = tot_n_`ss' - cum_n_`ss'

qui summ diff_n_`ss', d


local p10 = r(p10)
local p90 = r(p90)


gen diff_neg_`ss'_1090 = diff_n_`ss'<`p10'
gen diff_zer_`ss'_1090 = diff_n_`ss'>=`p10' & diff_n_`ss'<=`p90'
gen diff_pos_`ss'_1090 = diff_n_`ss'>=`p90' & diff_n_`ss'!=.

foreach type in preY { 


gen VA_`type'_`ss'_neg_1090 = mu_jt_hat_`type'_`ss'_heterogeneous * diff_neg_`ss'_1090
gen VA_`type'_`ss'_zer_1090 = mu_jt_hat_`type'_`ss'_heterogeneous * diff_zer_`ss'_1090
gen VA_`type'_`ss'_pos_1090 = mu_jt_hat_`type'_`ss'_heterogeneous * diff_pos_`ss'_1090
}

}


foreach ss in ma { 
	
if "`ss'"=="ma" {
	local subj = "Math"
}



label var Abar_`ss'_heterogeneous "Mean Res"

reg Abar_`ss'_heterogeneous VA_preY_`ss'_neg_1090 VA_preY_`ss'_zer_1090 VA_preY_`ss'_pos_1090 if j==L.j & t==L.t+1 & diff_pos_`ss'_1090!=., vce(cluster j)
local clust = e(N_clust)
test VA_preY_`ss'_neg_1090=VA_preY_`ss'_zer_1090=VA_preY_`ss'_pos_1090=1
local p = r(p)
local p = round(`p',.001)
estadd local pval "`p'"
estadd ysumm
estadd local clust "`clust'"
estadd local subj "`subj'"
eststo split_preY_`ss'_1090

label var Abar_`ss'_heterogeneous "Mean Res"
label var VA_preY_`ss'_neg_1090 "VA -- below 10th (size)"
label var VA_preY_`ss'_zer_1090 "VA -- 10th-90th (size)"
label var VA_preY_`ss'_pos_1090 "VA -- above 90th (size)"
}



* VA distribution

use "$basedata/va_estimates_drift", clear

sort j t s

keep if s!=.

ren Abar_ma Abar_ma_heterogeneous
ren mu_jt_hat_preY_ma mu_jt_hat_preY_ma_heterogeneous

sort j t schlcode lea

bys j t schlcode lea: egen num_obs = count(s)
tab num_obs
drop if num_obs>1
drop num_obs

merge 1:n j t schlcode lea using "$basedata/va_homogeneous_estimates_drift", keepusing(mu_jt_hat_preY_ma Abar_ma)
drop _m

bys j t schlcode lea: egen num_obs = count(s)
tab num_obs
drop if num_obs>1
drop num_obs

ren Abar_ma Abar_ma_homogeneous
ren mu_jt_hat_preY_ma mu_jt_hat_preY_ma_homogeneous


*** Check for forecast unbiasedness and whether it varies with transfer

keep if s!=.

bys j t: egen num_schools_t = count(s)
tab num_schools_t
drop if num_schools_t>1

sort j t s

xtset j t

egen lea_code = group(lea)

qui summ lea_code if lea=="XXX" /// hide identity
local FOCAL = r(mean)


gen transfer = s!=L.s if j==L.j & t==L.t+1
gen transfer_lea = lea_code!=L.lea_code if j==L.j & t==L.t+1
gen transfer_wi = s!=L.s & lea_code==L.lea_code if j==L.j & t==L.t+1
gen transfer_FOCAL = s!=L.s & lea_code==L.lea_code & lea_code==`FOCAL' if j==L.j & t==L.t+1
gen transfer_wi_nonFOCAL = transfer_wi==1 & transfer_FOCAL==0

gen has_transferred = transfer
replace has_transferred = 1 if j==L.j & L.transfer == 1

gen has_transferred_lea = transfer_lea
replace has_transferred_lea = 1 if j==L.j & L.transfer_lea == 1

gen has_transferred_wi = transfer_wi
replace has_transferred_wi = 1 if j==L.j & L.transfer_wi == 1

gen has_transferred_FOCAL = transfer_FOCAL
replace has_transferred_FOCAL = 1 if j==L.j & L.transfer_FOCAL == 1

gen has_transferred_wi_nonFOCAL = transfer_wi_nonFOCAL
replace has_transferred_wi_nonFOCAL = 1 if j==L.j & L.transfer_wi_nonFOCAL == 1



foreach type in preY { 

foreach ss in ma { 
	
if "`ss'"=="ma" {
	local subj = "Math"
}	



reg Abar_`ss'_heterogeneous mu_jt_hat_`type'_`ss'_heterogeneous if j==L.j & t==L.t+1, vce(cluster j)
local clust = e(N_clust)
test mu_jt_hat_`type'_`ss'_heterogeneous=1
local p = r(p)
local p = round(`p',.001)
estadd local pval "`p'"
estadd ysumm
estadd local clust "`clust'"
estadd local subj "`subj'"
eststo het_`type'_`ss'

reg Abar_`ss'_heterogeneous has_transferred if j==L.j & t==L.t+1, vce(cluster j)
local clust = e(N_clust)
test has_transferred=0
local p = r(p)
local p = round(`p',.001)
estadd local pval "`p'"
estadd ysumm
estadd local clust "`clust'"
estadd local subj "`subj'"
eststo transfer_`type'_`ss'



gen VA_post_lea_`type'_`ss' = mu_jt_hat_`type'_`ss'_heterogeneous * has_transferred_lea
gen VA_post_wi_nonFOCAL_`type'_`ss' = mu_jt_hat_`type'_`ss'_heterogeneous * has_transferred_wi_nonFOCAL
gen VA_post_FOCAL_`type'_`ss' = mu_jt_hat_`type'_`ss'_heterogeneous * has_transferred_FOCAL
gen VA_post_`type'_`ss' = mu_jt_hat_`type'_`ss'_heterogeneous * has_transferred


reg Abar_`ss'_heterogeneous mu_jt_hat_`type'_`ss'_heterogeneous VA_post_`type'_`ss' has_transferred if j==L.j & t==L.t+1, vce(cluster j)
test mu_jt_hat_`type'_`ss'_heterogeneous+VA_post_`type'_`ss'=1
local p = r(p)
local p = round(`p',.001)
estadd local pval "`p'"
local clust = e(N_clust)
estadd ysumm
estadd local clust "`clust'"
estadd local subj "`subj'"
eststo transfer_int_`type'_`ss'

gen Abar_wedge_`ss' = Abar_m2_`ss'-Abar_m1_`ss'
gen mu_jt_preY_wedge_`ss' =  mu_jt_m2_hat_preY_`ss' - mu_jt_m1_hat_preY_`ss'

reg Abar_wedge_`ss' mu_jt_preY_wedge_`ss' if j==L.j & t==L.t+1, vce(cluster j)
local clust = e(N_clust)
test mu_jt_preY_wedge_`ss'=1
local p = r(p)
local p = round(`p',.001)
estadd local pval "`p'"
estadd ysumm
estadd local clust "`clust'"
estadd local subj "`subj'"
eststo het_`type'_`ss'_wedge

label var Abar_`ss'_heterogeneous "Mean Res"
label var Abar_`ss'_homogeneous "Mean Res"
label var Abar_wedge_`ss' "Mean Diff"
label var mu_jt_hat_`type'_`ss'_heterogeneous "VA (Heterog)"
label var mu_jt_hat_`type'_`ss'_homogeneous "VA (Homog)"
label var mu_jt_preY_wedge_`ss' "VA Diff"

label var VA_post_lea_`type'_`ss' "VA * Post LEA Transfer"
label var VA_post_wi_nonFOCAL_`type'_`ss' "VA * Post w/i LEA non-FOCAL Transfer"
label var VA_post_FOCAL_`type'_`ss' "VA * Post w/i FOCAL Transfer"
label var VA_post_`type'_`ss' "VA * Post Transfer"

}

label var has_transferred_lea "Post-LEA Transfer"
label var has_transferred_wi_nonFOCAL "Post-w/i LEA non-FOCAL Transfer"
label var has_transferred_FOCAL "Post-w/i FOCAL Transfer"
label var has_transferred "Post Transfer"


	
}

binscatter Abar_ma_heterogeneous mu_jt_hat_preY_ma_heterogeneous if j==L.j & t==L.t+1, xtitle("Math VA (using Prior Data)") ytitle("Mean (Residualized) Math Scores")
gr export "$figures/VA_preY_validation_binscatter_ma.png", replace

binscatter Abar_wedge_ma mu_jt_preY_wedge_ma if j==L.j & t==L.t+1, xtitle("Difference in VA (using Prior Data)") ytitle("Difference in Mean (Resid.) Math Scores")
gr export "$figures/VA_preY_validation_binscatter_ma_wedge.png", replace


*** Check for whether forecast unbiasedness is affected by the type of change in student composition 

qui summ t if j==L.j & t==L.t+1
local mint = r(min)
local maxt = r(max)

foreach ss in ma { 

	gen cum_p_m2_`ss' = .
	
forv tt=`mint'/`maxt' {
	cap drop tempvar1 tempvar2
	gen tempvar1 = p_m2_`ss' if t<`tt'
	bys j: egen tempvar2 = mean(tempvar1)
	replace cum_p_m2_`ss' = tempvar2 if t==`tt'
}
gen diff_p_m2_`ss' = p_m2_`ss' - cum_p_m2_`ss'


qui summ diff_p_m2_`ss', d


local p10 = r(p10)
local p90 = r(p90)



gen diffp_neg_`ss'_1090 = diff_p_m2_`ss'<`p10'
gen diffp_zer_`ss'_1090 = diff_p_m2_`ss'>=`p10' & diff_p_m2_`ss'<=`p90'
gen diffp_pos_`ss'_1090 = diff_p_m2_`ss'>=`p90' & diff_p_m2_`ss'!=.

foreach type in preY { 


gen VA_`type'_`ss'_neg_1090p = mu_jt_hat_`type'_`ss'_heterogeneous * diffp_neg_`ss'_1090
gen VA_`type'_`ss'_zer_1090p = mu_jt_hat_`type'_`ss'_heterogeneous * diffp_zer_`ss'_1090
gen VA_`type'_`ss'_pos_1090p = mu_jt_hat_`type'_`ss'_heterogeneous * diffp_pos_`ss'_1090


}
}

foreach ss in ma { 
	
if "`ss'"=="ma" {
	local subj = "Math"
}




reg Abar_`ss'_heterogeneous VA_preY_`ss'_neg_1090p VA_preY_`ss'_zer_1090p VA_preY_`ss'_pos_1090p if j==L.j & t==L.t+1 & diffp_pos_`ss'_1090!=., vce(cluster j)
local clust = e(N_clust)
test VA_preY_`ss'_neg_1090p=VA_preY_`ss'_zer_1090p=VA_preY_`ss'_pos_1090p=1
local p = r(p)
local p = round(`p',.001)
estadd local pval "`p'"
estadd ysumm
estadd local clust "`clust'"
estadd local subj "`subj'"
eststo split_preY_`ss'_1090p

label var Abar_`ss'_heterogeneous "Mean Res"
label var VA_preY_`ss'_neg_1090p "VA -- below 10th (disadv)"
label var VA_preY_`ss'_zer_1090p "VA -- 10th-90th (disadv)"
label var VA_preY_`ss'_pos_1090p "VA -- above 90th (disadv)"



}

gen VA_preY_ma_neg_1090 = .
gen VA_preY_ma_zer_1090 = .
gen VA_preY_ma_pos_1090 = .

label var VA_preY_ma_neg_1090 "VA -- below 10th (size)"
label var VA_preY_ma_zer_1090 "VA -- 10th-90th (size)"
label var VA_preY_ma_pos_1090 "VA -- above 90th (size)"


	esttab het_preY_ma het_preY_ma_wedge transfer_preY_ma transfer_int_preY_ma split_preY_ma_1090p split_preY_ma_1090    ///
	using "$tables/vam_forecast_validation_combined_ma_order.tex", replace se label booktabs ///
	nostar   ///
	substitute(\_ _)  ///
	keep (mu_jt_hat_preY_*_heterogeneous has_transferred VA_post_* VA_preY_*_neg* VA_preY_*_zer* VA_preY_*_pos* mu_jt_preY_wedge* _cons) ///
	stats(subj ymean clust N, labels ("Subject" "Mean DV" "Clusters" "N")) nonote nonumbers 


*** Create tables for first-best using all teachers in a year

global year = 2016
global year_minus_1 = $year-1

cap program drop allocation
program define allocation


if `2' == 1 {
	insheet using "$temp/assignments`7'_`1'.csv", comma clear
	ren v1 teacher_index
	gen classroom_index = _n
	sort teacher_index
	merge 1:1 teacher_index using "$temp/tempteachers`7'"
	assert _m==3
	drop _m

}

if `2' > 1 {
	insheet using "$temp/assignments`7'_`1'.csv", comma clear
	gen classroom_index = _n
	
	forv ss=1/`2' {
		ren v`ss' teacher_index
		sort teacher_index
		merge 1:1 teacher_index using "$temp/tempteachers`7'"
		assert _m==3
		drop _m
		ren VA1_sort VA1_sim`ss'
		ren VA2_sort VA2_sim`ss'
		drop teacher_index
	}
	egen VA1_sort = rowmean(VA1_sim*)
	egen VA2_sort = rowmean(VA2_sim*)
	drop VA1_sim* VA2_sim*
}

	sort classroom_index
	
	merge 1:1 classroom_index using "$temp/tempassign`7'"
	assert _m==3
	drop _m

if `6' == 1 {
	* identify bottom X% of teachers
	gen per_student_output = VA1_sort * p_m1_ma + VA2_sort * p_m2_ma
	qui summ per_student_output, d
	local bottom5cutoff = r(p5)
	gen replaceteacher = per_student_output<=`bottom5cutoff'
	qui summ VA1_sort, d
	local med1 = r(p50)
	qui summ VA2_sort, d
	local med2 = r(p50)
	replace VA1_sort = `med1' if replaceteacher==1
	replace VA2_sort = `med2' if replaceteacher==1
}

gen output1 = VA1_sort * `3'
gen output2 = VA2_sort * `4'
gen output = output1 + output2

egen tot_output1=total(output1)
egen tot_output2=total(output2)
egen tot_output=total(output)

gen output_per_student1=tot_output1/tot_student1
gen output_per_student2=tot_output2/tot_student2 
gen output_per_student=tot_output/tot_student
 
qui summ output_per_student 
local total_output_`5' = r(mean)

qui summ output_per_student1
local total_output_`5'_m1 = r(mean)
qui summ output_per_student2
local total_output_`5'_m2 = r(mean)

use "$temp/potential_gains`7'", clear
cap gen total_output_`5' = `total_output_`5''
cap gen total_output_`5'_m1 = `total_output_`5'_m1'
cap gen total_output_`5'_m2 = `total_output_`5'_m2'
save "$temp/potential_gains`7'", replace

end

foreach hh in "" {
    

use "$basedata/FOCAL_school_year_data", clear
keep if sy==$year
collapse (max) titleI titleI_eligible, by(ncerdc_lea ncerdc_schlcode)
tempfile tempTitleI
save `tempTitleI', replace

use "$basedata/va_estimates_drift`hh'", clear

keep if t==$year & s!=. & lea_ma=="XXX" /// hide identity

ren j ncerdc_id
ren t sy

sort ncerdc_id sy
merge 1:n ncerdc_id sy using "$basedata/allLEA_teacher_year_data", keepusing(grade*)
assert _m!=1
keep if _m==3
drop _m

ren (ncerdc_id sy) (j t)

ren (lea_ma schlcode_ma) (ncerdc_lea ncerdc_schlcode)
destring ncerdc_lea ncerdc_schlcode, replace

sort ncerdc_lea ncerdc_schlcode
merge n:1 ncerdc_lea ncerdc_schlcode using `tempTitleI'
drop if _m==2
drop _m

keep if mu_jt_m1_hat_preY_ma!=. & mu_jt_m2_hat_preY_ma!=. & p_m1_ma!=. & p_m2_ma!=. & e_ma!=.

keep if inlist(grade_low,"KG","PK") & inlist(grade_high,"04","05")

assert p_m1_ma+p_m2_ma==1

gen n_ct_ma = n_ct_m1_ma + n_ct_m2_ma

if "`hh'"=="" {
	local l1 = "Disadvantaged"
}
if "`hh'"=="_white" {
	local l1 = "White"
}
if "`hh'"=="_achHi_ma" {
	local l1 = "High Achieving"
}




bys s: egen mean_n1 = mean(n_ct_m1_ma)
bys s: egen mean_n2 = mean(n_ct_m2_ma)
keep if mu_jt_m1_hat_preY_ma!=. & mu_jt_m2_hat_preY_ma!=. & n_ct_m1_ma!=. & n_ct_m2_ma!=. & e_ma!=. & s!=.

sort j

preserve

keep mu_jt_m1_hat_preY_ma mu_jt_m2_hat_preY_ma mean_n1 mean_n2 e_ma s n_ct_m1_ma n_ct_m2_ma
order mu_jt_m1_hat_preY_ma mu_jt_m2_hat_preY_ma mean_n1 mean_n2 e_ma s n_ct_m1_ma n_ct_m2_ma
outsheet using "$temp/hungarian_input`hh'.csv", comma replace

** run potential_gains.m in matlab

restore


gen ca_ma = mu_jt_m2_hat_preY_ma-mu_jt_m1_hat_preY_ma

ren (mu_jt_m1_hat_preY_ma mu_jt_m2_hat_preY_ma) (VA1 VA2)

gen n_m2_m1=n_ct_m2_ma-n_ct_m1_ma
gen n_students= n_ct_m1_ma +  n_ct_m2_ma

egen tot_student=total(n_students)
egen tot_student1=total(n_ct_m1_ma)
egen tot_student2=total(n_ct_m2_ma)

gen classroom_index = _n

save "$temp/tempassign`hh'", replace

ren classroom_index teacher_index
gen VA1_sort = VA1
gen VA2_sort = VA2
keep teacher_index VA1_sort VA2_sort


save "$temp/tempteachers`hh'", replace

clear
set obs 1

gen counter = 1

save "$temp/potential_gains`hh'", replace



* 1: suffix for input file
* 2: number of simulations
* 3: count variable1
* 4: count variable2
* 5: suffix for saving
* 6: replace bottom 10% of teachers
* 7: suffix

allocation "best" 1 mean_n1 mean_n2 "best" 0 "`hh'"
allocation "worst" 1 mean_n1 mean_n2 "worst" 0 "`hh'"
allocation "wiexp" 1 mean_n1 mean_n2 "wiexp" 0 "`hh'"
allocation "wis" 1 n_ct_m1_ma n_ct_m2_ma "wis" 0 "`hh'"
allocation "maxm1" 1 mean_n1 mean_n2 "maxm1" 0 "`hh'"
allocation "maxm2" 1 mean_n1 mean_n2 "maxm2" 0 "`hh'"
allocation "actual" 1 mean_n1 mean_n2 "actual" 0 "`hh'"
allocation "actual" 1 n_ct_m1_ma n_ct_m2_ma "actual_NC" 0 "`hh'"
allocation "random" 100 mean_n1 mean_n2 "random" 0 "`hh'"

if "`hh'"=="" {
	allocation "random_wis" 100 n_ct_m1_ma n_ct_m2_ma "random_wis" 0 "`hh'"
}

allocation "best_NC" 1 mean_n1 mean_n2 "best_NC" 0 "`hh'"
allocation "actual" 1 mean_n1 mean_n2 "replace5" 1 "`hh'"


}


use "$temp/potential_gains`hh'", clear


gen jvar = .
gen lvar = .

label var jvar "\emph{\textbf{Targeting Disadvantaged Students}}"
label var lvar "Max Disadvantaged VA"	


mat jshort = (.,.,.)
mat lshort = (total_output_maxm2_m1[1]-total_output_actual_m1[1],total_output_maxm2_m2[1]-total_output_actual_m2[1],total_output_maxm2[1]-total_output_actual[1])


mat rownames jshort = jvar
mat rownames lshort = lvar

matrix matfullshort = jshort\lshort

frmttable using "$tables/first_best_all_teachers_alternate_policies`hh'_short", statmat(matfullshort) replace va tex fra ///
	ctitles("","Adv","Disadv","Mean") ///
	sdec(0,0,0 \ 3,3,3)


	



use "$basedata/va_estimates_drift.dta", clear
gen ncerdc_id=j
gen sy=t

* keep math teachers and drop reading multiple schools
drop if s==.
bys j t: egen num_schools = count(s)
drop if num_schools>1 & mu_t_m1_hat_career_ma==. 
drop num_schools
bys j t: egen num_schools = count(s)
drop if num_schools>1 & mu_t_m1_hat_career_ma==. 
drop num_schools

merge m:1 ncerdc_id sy using "$basedata/allLEA_teacher_year_data"
drop if _m==1 | _m==2
drop _m 

******deal with duplicates
cap gen j=ncerdc_id
cap gen t=sy
duplicates tag j t, generate(dup)
drop if ncerdc_id==.
drop if multi_school_ma==1 
foreach i in ma  {
replace Abar_`i'= Abar_m1_`i' if Abar_`i'==.
replace Abar_`i'= Abar_m2_`i' if Abar_`i'==.
}
egen temp=rowmax( Abar_ma )
egen sc=group(schlcode_salary lea_salary)
drop if temp==.
drop temp
egen mu_pres=rowmax(mu*)
egen min_sy=min(sy) if mu_pres!=. , by(ncerdc_id)
egen trans_sum=sum(transfer), by (ncerdc_id)
gen year_transfer = sy if transfer==1
egen school_id=group(ncerdc_schlcode ncerdc_lea)
egen trans_max=max(transfer), by(ncerdc_id)
egen tch_schl=group(ncerdc_id ncerdc_lea ncerdc_schlcode)
egen tag=tag(ncerdc_id ncerdc_lea ncerdc_schlcode) 
 xtset j t 
gen temp=(f.transfer==1)
replace temp=1 if f2.transfer==1 & temp==.
replace temp=1 if f3.transfer==1 & temp==. & f.temp==.
egen exp_tch_schl=mean(tchr_exp_pay_level), by(j s)
gen exp=tchr_exp_pay_level
gen expmiss=(exp_tch_schl==.)
replace exp_tch_schl=99 if exp_tch_schl==.


egen  pre_transfer=max(temp), by(ncerdc_id ncerdc_lea ncerdc_schlcode)
egen post_transfer=max(transfer), by(ncerdc_id ncerdc_lea ncerdc_schlcode)
drop temp
foreach i in ma  {
gen stu_count_VA_1yr`i'=n_ct_m1_`i'+n_ct_m2_`i' 
cap gen DISAD_share_1yr`i' =  n_ct_m2_`i'/(n_ct_m1_`i'+n_ct_m2_`i')
egen schl_stu`i'=sum(n_ct_m1_`i'+n_ct_m2_`i'), by(j s)
egen Abarw`i'=sum(Abar_`i'*(n_ct_m1_`i'+n_ct_m2_`i')/schl_stu`i'), by(j s)
egen DISAD_sharew`i'=sum(DISAD_share_1yr`i'*(n_ct_m1_`i'+n_ct_m2_`i')/schl_stu`i'), by(j s)
gen d_Abarw`i'=Abarw`i'-l.Abarw`i'
replace d_Abarw`i'=Abarw`i'-l2.Abarw`i' if d_Abarw`i'==. & transfer==1
gen d_DISAD_sharew`i'=DISAD_sharew`i'-l.DISAD_sharew`i'
replace d_DISAD_sharew`i'=DISAD_sharew`i'-l2.DISAD_sharew`i' if d_DISAD_sharew`i'==. & transfer==1
 gen VA_diff`i'sc=mu_jt_m1_hat_wiS_`i'-mu_jt_m2_hat_wiS_`i'
cap egen VA_diff`i'=mean( VA_diff`i'sc), by(ncerdc_id sc) 
cap gen temp =l.VA_diff`i' if  transfer==1
replace temp=l2.VA_diff`i' if transfer==1 & temp==.
cap egen VA_diff`i'_pre=max(temp), by(ncerdc_id ncerdc_lea ncerdc_schlcode)
drop temp 
sum VA_diff`i' [fw=stu_count_VA_1yr`i'], detail
local med=r(p50)
gen betterDISAD`i'=(VA_diff`i'<`med' )
replace betterDISAD`i'=. if VA_diff`i'==. 
sum VA_diff`i'_pre [fw=stu_count_VA_1yr`i'] if transfer==1 & VA_diff`i'_pre!=., det
local med_pre=r(p50)

gen temp=(VA_diff`i'_pre<`med_pre' ) if transfer==1 & VA_diff`i'_pre!=.
cap egen b`i'_pre=max(temp), by(ncerdc_id ncerdc_lea ncerdc_schlcode)
cap gen betterDISAD`i'_pre=b`i'_pre
replace betterDISAD`i'_pre= betterDISAD`i' if pre_transfer==1 & betterDISAD`i'_pre==.

}

eststo clear
foreach i in ma  {
    
	cap drop better_DISAD DISADshXbetter_DISAD DISAD_share Abar

gen better_DISAD=betterDISAD`i'_pre 
gen DISAD_share=d_DISAD_sharew`i'
gen DISADshXbetter_DISAD=d_DISAD_sharew`i'*better_DISAD
gen Abar=d_Abarw`i' 
eststo: reg Abar DISAD_share DISADshXbetter_DISAD better_DISAD  [fw=stu_count_VA_1yr`i'] if transfer==1, cluster(ncerdc_id)
estadd scalar teachers = round(e(N_clust),1)
estadd scalar students = round(e(N), 1)
sum VA_diff`i'_pre [fw=stu_count_VA_1yr`i'] if  e(sample)==1
estadd scalar VA_diff_mn=r(mean)
estadd scalar VA_diff_sd=r(sd)
estadd local Model "Transfer FD"
estadd local Controls "No"

eststo: reg Abar DISAD_share DISADshXbetter_DISAD better_DISAD exp_tch_schl c.exp_tch_schl#c.exp_tch_schl  c.exp_tch_schl#c.exp_tch_schl#c.exp_tch_schl expmiss i.sy  [fw=stu_count_VA_1yr`i'] if transfer==1, cluster(ncerdc_id)
estadd scalar teachers = round(e(N_clust),1)
estadd scalar students = round(e(N), 1)
sum VA_diff`i'_pre [fw=stu_count_VA_1yr`i'] if  e(sample)==1
estadd scalar VA_diff_mn=r(mean)
estadd scalar VA_diff_sd=r(sd)
estadd local Model "Transfer FD"
estadd local Controls "Yes"



}
label var DISAD_share "Share disadvantaged"
label var DISADshXbetter_DISAD "Share disadvantaged x CA in disadvantaged"
label var Abar "Student res"

esttab est1 est2 ///
using "$tables/match_validation.tex", replace se label booktabs ///
nostar   ///
substitute(\_ _)  ///
keep(DISADshXbetter_DISAD DISAD_share) ///
stats(teachers students VA_diff_mn VA_diff_sd  Controls, labels ("Num teachers" "Num students" "Mean CA" "SD CA" "Controls")) nonote nonumbers 
